

cd "D:\Dropbox\book_welfare\replication"

****************************************************
* make the bookstat row of the gender match table 
****************************************************

use  data\bookstat_gender_pyear_genre_asin.dta, clear
	keep if pyear>=1960 & pyear<=2021

	gen x = 1 
	rename total N 
	gen N_female = N_name-N_male 
	table x, stat(sum N) stat(sum N_name) stat(sum N_female)
	gen dataset = "Bookstat"
	collapse (sum) N N_name N_female (min) minp = pyear (max) maxp=pyear , by(dataset)
	tempfile b 
	save `b'
	
use data\bookstat_sales_gender_pyear_genre_asin.dta, clear 
	keep if pyear>=1960 & pyear<=2021
	keep if year>=2018 & year<=2021 
	gen q_female = q_name - q_male 
	gen dataset = "Bookstat"
	collapse (sum) Q=q Q_name = q_name Q_female=q_female, by(dataset) 
	merge 1:1 dataset using `b'
	tempfile b 
	save `b'

****************************************************
* make the Goodreads row of the gender match table 
****************************************************
 	use  "data\books.dta", clear 
		keep if pubyr>=1960 & pubyr<=2016 
		gen N=1
		gen fbook= mshare<.5  if mshare~=. 
		gen fbookp= pmshare<.5  if pmshare~=. 
		
		gen N_name = fbookp~=. 
		gen N_female = fbookp==1 
		gen dataset = "Goodreads"
		collapse (sum) N N_name N_female (min) minp = pubyr (max) maxp=pubyr , by(dataset)
		tempfile g 
		save `g'

	
	use  "data\books_year.dta", clear 
		keep if year>=2007 & year<=2016
		keep if pubyr>=1960 & pubyr<=2016 
		
		gen Q = q 
		gen Q_name = Q*(fbookp~=.)
		gen Q_female =Q*(fbookp==1)
		
		gen dataset = "Goodreads"
		collapse (sum)  Q Q_name Q_female  , by(dataset)
		merge 1:1 dataset using `g'
		tempfile g 
		save `g'


****************************************************
* make the LOC row of the gender match table 
****************************************************

use data\loc_summary.dta , clear 
	gen x=1
	rename Nfemale N_female 
	rename Nname N_name 

	table x, stat(sum N) stat(sum N_name) stat(sum Nmatch) stat(sum N_female)
 	gen dataset = "Library of Congress"
	collapse (sum) N N_name N_female Nmatch (min) minp = year (max) maxp=year , by(dataset)
	tempfile l 
	save `l'

	
****************************************************
* make the pulitzer row of the gender match table 
****************************************************

	use  data\pulitzer.dta, clear 
	gen x=1
	rename Nmatch N_name 
	rename Nfemale N_female 
	keep if year>=1960
    table x , stat(sum N) stat(sum N_name) stat(sum N_female)
	
 	gen dataset = "Pulitzer Prize"
	collapse (sum) N N_name N_female  (min) minp = year (max) maxp=year, by(dataset)
	tempfile p 
	save `p'

	

****************************************************
* make the NBA row of the gender match table 
****************************************************


 use  data\nba.dta, clear 
 	gen x=1
	rename Nname N_name 
	rename Nf N_female 
	keep if year>=1960

    table x, stat(sum N) stat(sum N_name) stat(sum N_female)
 	gen dataset = "National Book Award"
	collapse (sum) N N_name N_female  (min) minp = year (max) maxp=year, by(dataset)
	tempfile n 
	save `n'

****************************************************
* make the NYT fiction row of the gender match table 
****************************************************
	
  use  data\bestsellers.dta, clear 
 	gen x=1
	gen nyt_f = nyt_name - nyt_m 
	
	rename nyt_total N 
	rename nyt_name N_name
	rename nyt_f N_female 
	keep if year>=1960

    table x, stat(sum N) stat(sum N_name) stat(sum N_female)
 	gen dataset = "NYT fiction"
	collapse (sum) N N_name N_female  (min) minp = year (max) maxp=year, by(dataset)
	tempfile nyt 
	save `nyt'
****************************************************
* US copyright registrations from other paper  
****************************************************
	
	
	
	use 	data\reg_sumary.dta, clear 
	
	keep if cleantype=="Non-dramatic literary work"
 	gen dataset = "Copyright registrations"
	 
	collapse (sum) N N_name=Nmatch N_female=Nfemale  (min) minp = year (max) maxp=year, by(dataset)
	tempfile cr 
	save `cr'
	
	
	* this is the table - a little tweaking in Excel makes it pretty 
	
	
	use `b', clear 
	append using `g'
	append using `l'
	append using `p'
	append using `n'
	append using `nyt'
	append using `cr'
	
	gen pctid = N_name/N 
	gen pctfid = N_female/N_name 
	gen pctf = N_female/N 
	
	gen qpctid = Q_name/Q 
	gen qpctfid = Q_female/Q_name 
	gen qpctf = Q_female/Q 
	
	replace Q = Q/1000  
	format %11.0g N
	order dataset N pctid  pctfid pctf Q   qpctid  qpctfid qpctf minp maxp
	br dataset N pctid  pctfid pctf Q   qpctid  qpctfid qpctf minp maxp
